package com.msh.starter.pinying.instance;

import com.msh.frame.client.define.StringDef;
import com.msh.frame.client.exception.ServiceException;
import com.msh.frame.common.util.StringUtil;
import com.msh.frame.interfaces.IPinying;
import com.msh.starter.pinying.interfaces.IPinyingIdx;
import java.util.HashSet;
import java.util.Set;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.springframework.util.StringUtils;

@Slf4j
public class DefaultIPinying implements IPinyingIdx {
  private final HanyuPinyinOutputFormat FORMAT = new HanyuPinyinOutputFormat();

  public DefaultIPinying() {
    /**
     * 输出大小写设置
     *
     * LOWERCASE:输出小写
     * UPPERCASE:输出大写
     */
    FORMAT.setCaseType(HanyuPinyinCaseType.LOWERCASE);

    /**
     * 输出音标设置
     *
     * WITH_TONE_MARK:直接用音标符（必须设置WITH_U_UNICODE，否则会抛出异常）
     * WITH_TONE_NUMBER：1-4数字表示音标
     * WITHOUT_TONE：没有音标
     */
    FORMAT.setToneType(HanyuPinyinToneType.WITHOUT_TONE);

    /**
     * 特殊音标ü设置
     *
     * WITH_V：用v表示ü
     * WITH_U_AND_COLON：用"u:"表示ü
     * WITH_U_UNICODE：直接用ü
     */
    FORMAT.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON);
  }

  /**
   * 汉字转拼音
   * @param hanzi 8年X
   * @return  8 nian X
   */
  @Override
  public String convert(String hanzi) {
    char[] hanYuArr = hanzi.trim().toCharArray();
    StringBuilder pinYin = new StringBuilder();

    try {
      for (int i = 0, len = hanYuArr.length; i < len; i++) {
        //匹配是否是汉字
        if (Character.toString(hanYuArr[i]).matches("[\\u4E00-\\u9FA5]+")) {
          //如果是多音字，返回多个拼音，这里只取第一个
          String[] pys = PinyinHelper.toHanyuPinyinStringArray(hanYuArr[i], FORMAT);
          pinYin.append(pys[0]).append(" ");
        } else {
          pinYin.append(hanYuArr[i]).append(" ");
        }
      }
    } catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
      log.warn("拼音转换失败, param: {}", hanzi, badHanyuPinyinOutputFormatCombination);
      ServiceException.doThrow("拼音转换失败");
    }
    return pinYin.toString();
  }

  /**
   * 转换拼音索引 英文
   * 例 8 nian ji A ban
   * 输出结果 [ab, b, nianjiaban, aban, jab, njab, ban, 8nianjiaban, jiaban, 8njab]
   */
  @Override
  public Set<String> convertPinyinIdx(String pinyin, int maxLen){
    pinyin = pinyin.trim().toLowerCase();
    String[] split = pinyin.split(StringDef.SPACE);
    int length = split.length;
    Set<String> set = new HashSet<>(length * length);
    StringBuilder sb = new StringBuilder();
    for(int i=0; i<split.length; i++){
      sb.setLength(0);
      for(int j=i; j<split.length; j++){
        if(!StringUtils.isEmpty(split[j])){
          String substring = split[j].substring(0, 1);
          if(!StringDef.SPACE.equals(substring)){
            sb.append(substring);
          }
        }
      }
      set.add(StringUtil.overLengthCut(sb.toString(), maxLen));
    }
    set.add(StringUtil.overLengthCut(pinyin.replaceAll(StringDef.SPACE, StringDef.EMPTY), maxLen));
    int i = pinyin.indexOf(StringDef.SPACE);
    while(i >= 0){
      pinyin = pinyin.substring(i+1);
      if(StringUtils.isEmpty(pinyin)){
        break;
      }
      set.add(
          StringUtil.overLengthCut(pinyin.replaceAll(StringDef.SPACE, StringDef.EMPTY), maxLen));
      i = pinyin.indexOf(StringDef.SPACE);
    }
    if(!StringUtils.isEmpty(pinyin)){
      set.add(StringUtil.overLengthCut(pinyin, maxLen));
    }
    return set;
  }

  /**
   * 转换索引 包括中文
   * 例 8年级A班
   * 输出结果 [级a班, a班, 班, 年级a班, 8年级a班]
   */
  @Override
  public Set<String> convertHanziIdx(String str, int maxLen) {
    Set<String> set = new HashSet<>(str.length()*2);
    for(int i = 0; i < str.length(); i++){
      set.add(StringUtil.overLengthCut(str.substring(i), maxLen));
    }
    return set;
  }

  /**
   * 转换索引 包括中文英文
   * 例 8年级A班
   * 输出结果 [ab, b, nianjiaban, aban, 级a班, jab, a班, 班, njab, ban, 8nianjiaban, 年级a班, 8年级a班, jiaban, 8njab]
   */
  @Override
  public Set<String> convertIdx(String str, int maxLen){
    str = str.trim().toLowerCase();
    Set<String> set1 = convertHanziIdx(str, maxLen);
    String allPinyin = this.convert(str);
    Set<String> set2 = convertPinyinIdx(allPinyin, maxLen);
    set2.addAll(set1);
    return set2;
  }
}
